
setwd("./nonesharable_data/comtrade/")
comtradeFiles <- list.files(pattern = "comtrade.*")
comtradeList <- lapply(comtradeFiles, read.csv)
# reset working directory
replicate(2, setwd("../"))
trade_goods <- dplyr::bind_rows(comtradeList)

countries <-  c("ARG", "BOL", "CHL", "COL", "CRI", 
                "DOM", "ECU", "SLV", "GTM", "HND",
                "MEX", "NIC", "PAN", "PRY", "PER", 
                "URY", "VEN")

list_EU <- c("AUT", "BEL", "BRG", "CYP", "CZE", "DEU", "DNK",
             "EST", "ESP", "FIN", "FRA", "GBR", "GRC", "HRV",
             "HUN", "IRL", "ITA", "LTU", "LUX", "LVA", "MLT",
             "NLD", "POL", "PRT", "ROU", "SVK", "SVN", "SWE")

list_ALBA <- c("ATG", "DMA", "GRD", "CUB", "NIC", "KNA", "LCA", "VCT", "VEN",
               "HND", "ECU", "BOL")

list_PA <- c("CHL", "COL", "PER", "MEX")

trade_goods <- trade_goods[,c("Reporter.ISO", "Partner.ISO", "Year", "Trade.Flow", "Commodity.Code", "Trade.Value..US..")]
names(trade_goods) <- c("country", "partner", "year", "flow", "SITC3_group", "value")
trade_goods$flow <- car::recode(trade_goods$flow, "'Import' = 'M'; 'Export' = 'X'")

# Various years missing in trade data. Close gaps by last-observation-carried-forward
# only pan17 is actually used for the main analysis. col19, cri19, ecu19, and gtm18 are only used for endogeneity tests. hnd08 and ven14 are not used at all in the analysis but replaced here for ensuring complete data frames.
col19 <- subset(trade_goods, country == "COL" & year == 2018) %>%
  mutate(year = 2019)
cri19 <- subset(trade_goods, country == "CRI" & year == 2018) %>%
  mutate(year = 2019)
ecu19 <- subset(trade_goods, country == "ECU" & year == 2018) %>%
  mutate(year = 2019)
gtm18 <- subset(trade_goods, country == "GTM" & year == 2017) %>%
  mutate(year = 2018)
hnd08 <- subset(trade_goods, country == "HND" & year == 2007) %>%
  mutate(year = 2008)
pan17 <- subset(trade_goods, country == "PAN" & year == 2016) %>%
  mutate(year = 2017)
ven14 <- subset(trade_goods, country == "VEN" & year == 2013) %>%
  mutate(year = 2014)

trade_goods <- rbind(trade_goods, col19, cri19, ecu19, gtm18, hnd08, pan17, ven14)

# load raw services data
trade_services <- read.csv(file = "./nonesharable_data/comtrade/OECD-WTO_BATIS_data.csv")

trade_services <- trade_services %>%
  dplyr::filter(Partner %in% c("WL", countrycode::countrycode(unique(trade_goods$partner), origin = "iso3c", destination = "iso2c")) & Year >=2003 & Item_code != "S200") %>%
  mutate(value = Balanced_value * 1e+6, 
         flow = Flow,
         year = Year,
         EBOPS_code = Item_code,
         country = countrycode::countrycode(sourcevar = Reporter, origin = "iso2c", destination = "iso3c"),
         partner = ifelse(Partner == "WL", "WLD", countrycode::countrycode(sourcevar = Partner, origin = "iso2c", destination = "iso3c"))) %>%
  dplyr::select(c("country", "partner", "year", "flow", "EBOPS_code", "value"))

# services trade data ends in 2012
trade_services2013 <- subset(trade_services, year == 2012) %>%
  mutate(year = 2013)
trade_services2014 <- subset(trade_services, year == 2012) %>%
  mutate(year = 2014)
trade_services2015 <- subset(trade_services, year == 2012) %>%
  mutate(year = 2015)
trade_services2016 <- subset(trade_services, year == 2012) %>%
  mutate(year = 2016)
trade_services2017 <- subset(trade_services, year == 2012) %>%
  mutate(year = 2017)
trade_services2018 <- subset(trade_services, year == 2012) %>%
  mutate(year = 2018)
trade_services2019 <- subset(trade_services, year == 2012) %>%
  mutate(year = 2019)
trade_services <- rbind(trade_services, trade_services2013, trade_services2014, trade_services2015, trade_services2016, trade_services2017, trade_services2018, trade_services2019)

# combine goods and services data
trade_data <- trade_goods %>%
  bind_rows(., trade_services) %>% 
  mutate(agreement = ifelse(partner %in% list_EU, "EU",
                            ifelse(partner %in% list_ALBA, "ALBA",
                                   ifelse(partner %in% list_PA, "PA",
                                          ifelse(partner == "USA", "USA", 
                                                 ifelse(partner == "CHN", "CHN",
                                                        ifelse(partner == "WLD", "WLD", NA)))))))   %>%
  group_by(agreement, year, flow, country) %>%
  summarise(total_trade = sum(value, na.rm=T))

saveRDS(trade_data, "./nonesharable_data/trade_clean.RDS")




